#!/usr/bin/ruby -w

GC.disable # this gains us 0.2 second

$BDEV_ID = 0xFFFF0002
$BDEV_NAME = 'bdev'

def referenced?(page_flags)
  page_flags.include?(?R) or
  page_flags.include?(?A) or
  page_flags.include?(?M)
end

#
# Class representing a fstab/mtab entry extended with device_id.
#
class FileSystem

  def initialize (device_file, mount_point, fstype)
    @device_file = device_file
    @mount_point = mount_point
    @fstype      = fstype

    @device_id =
      if   @device_file == $BDEV_NAME then  $BDEV_ID
      elsif File.exist?(@device_file) then  File.stat(@device_file).rdev
      else @device_file.tr! '/', '-';       File.stat(@mount_point).dev end
  end

  def to_s
    "#{major}:#{minor}\t#{device_file}\t#{mount_point}\t#{fstype}"
  end

  def major()  @device_id >> 8    end
  def minor()  @device_id & 0xff  end

  attr_reader :device_id, :device_file, :mount_point, :fstype

end

#
# Class representing a collection of fstab/mtab entries extended with device_id
#
class FSTab

  NOFS = {
      'tmpfs'  => 0, 'ramfs'   => 0, 'sysfs'      => 0, 'proc'        => 0,
      'rootfs' => 0, 'debugfs' => 0, 'securityfs' => 0, 'sockfs'      => 0,
      'pipefs' => 0, 'futexfs' => 0, 'inotifyfs'  => 0, 'eventpollfs' => 0,
      'devpts' => 0, 'mqueue'  => 0, 'usbfs'      => 0, 'binfmt_misc' => 0,
  }

  # retrieve fs entries from /etc/fstab or /etc/mtab or /proc/mounts
  def initialize (file = '/etc/fstab')

    @source_file = file
    @fs_by_id = Hash.new

    File.foreach(file) do |line|

      dev, mp, fstype, options = line.split

      next if options == nil
      next if line[0] == ?#
      next if mp[0] != ?/       # skip pseudo fs
      next if NOFS.has_key? fstype
      next if options =~ /loop/ # no need to readahead loop fs

      fs = FileSystem.new(dev, mp, fstype)
      @fs_by_id[fs.device_id] ||= fs

    end

    @fs_by_id.delete(0) # in case we get device_id=0 for hotplug devices.

    # the filesystem for block special files
    @fs_by_id[$BDEV_ID] = FileSystem.new($BDEV_NAME, '', $BDEV_NAME)

    self
  end

  def each
    @fs_by_id.each_value { |fs| yield(fs) }
    self
  end

  def [] (device_id)
    @fs_by_id[device_id]
  end

  def to_s
    str = "#{@source_file}\n"
    @fs_by_id.each_value { |fs| str << "#{fs.to_s}\n" }
    str
  end

end


#
# Class representing one cached file
#
class CachedFile

  @@fstab = FSTab.new '/etc/mtab'

  def CachedFile.reset_seq
    @@seq = 0
  end

  def CachedFile.filecache
    unless defined? @@filecache
      @@filecache = File.open('/proc/filecache', File::RDWR)
      @@filecache.syswrite('set private')
      @@filecache.rewind
    end
    @@filecache
  end

  attr_reader   :file, :seq, :pages
  attr_accessor :dev,  :ino, :size, :state, :refcnt
  attr_accessor :process, :uid, :accessed

  def initialize(file)
    @file    = file

    @cached  = nil
    @cachedp = nil
    @collected = nil
    @process = ''
    @pages   = Hash.new

    @seq     = @@seq
    @@seq    = @@seq + 1
  end

  def init_pages
    filecache = CachedFile.filecache
    filecache.syswrite @file
    filecache.rewind
    filecache.each_line do |line|
      idx, len, state, refcnt = line.split
      idx, len, refcnt = idx.to_i, len.to_i, refcnt.to_i
      next unless yield(idx, len, state, refcnt)
      add_page_range idx, len
    end
    @collected = 4 * @pages.size
  end

  def add_page_range(idx, len = 1)
    len.times { |i| @pages[idx + i] = len }
  end

  def init_attrs
    stat = File.lstat(@file)
    if stat.blockdev?
      @dev = $BDEV_ID
    else
      @dev = stat.dev
    end

    @ino  = stat.ino
    @size = (stat.size + 1023) / 1024
  end

  def cached=(kb)
    @cached = kb
  end

  def cached
    @cached || 4 * @pages.size    # TODO: respect PAGE_SIZE
  end

  def collected
    @collected || 4 * @pages.size
  end

  def cachedp=(percent)
    @cachedp = percent
  end

  def cachedp
    return @cachedp if @cachedp

    if @size > 0
      @cachedp = (self.cached / @size.to_f).to_i
      @cachedp = 100 if @cachedp > 100
    else
      @cachedp = 0
    end
    @cachedp
  end

  def blockdev?()  @dev == $BDEV_ID  end
  def empty?()     @pages.empty?     end

  # a = a | b
  def union(b)
    @pages.update b.pages
    self
  end

  # a = a & b
  def intersection(b)
    @pages.delete_if { |k, v| not b.pages.include?(k) }
    self
  end

  # a = a - b
  def difference(b)
    @pages.delete_if { |k, v| b.pages.include?(k) }
    self
  end

  def page_ranges
    list = []
    idx = 0;
    len = 0;

    @pages.keys.sort.each do |offset|
      if idx + len == offset
        len = len + 1
      else
        list << [idx, len] if len > 0
        idx = offset
        len = 1
      end
    end

    list << [idx, len] if len > 0

    list
  end

  def to_s
    # if @dev == $BDEV_ID or @@fstab[@dev].fstype != 'ext3'
      s = "#{@file}\n"
    # else
    #   s = "#{@@fstab[@dev].mount_point}/.inode/#{@ino}\n"
    # end
    page_ranges.each { |span| s << "#{span[0]}\t#{span[1]}\n" }
    s << "\n"
  end

end

#
# Class representing an ordered list of cached files
#
class CachedFileList

  @@fstab = FSTab.new '/etc/mtab'

  def initialize
    @cfile_by_name = Hash.new
    self
  end

  # a = a | b
  def union(b)
    b.each do |bf|
      af = @cfile_by_name[bf.file]
      if af
        af.union bf
      else
        @cfile_by_name[bf.file] = bf
      end
    end
    self
  end

  # a = a & b
  def intersection(b)
    @cfile_by_name.each_value do |af|
      bf = b[af.file]
      if bf
        af.intersection bf
      else
        @cfile_by_name.delete af.file
      end
    end
    self
  end

  # a = a - b
  def difference(b)
    @cfile_by_name.each_value do |af|
      bf = b[af.file]
      if bf != nil
        af.difference bf
      end
    end
    self
  end

  def each
    @cfile_by_name.each_value do |cfile|
        yield cfile
    end
    self
  end

  def each_ordered
    @cfile_by_name.values.sort {|x,y| x.seq <=> y.seq} .each do |cfile|
        yield cfile
    end
    self
  end

  def [](file)
    @cfile_by_name[file]
  end

  def delete(file)
    @cfile_by_name.delete file
    self
  end

  def delete_if
    @cfile_by_name.delete_if { |key, cfile| yield(cfile) }
    self
  end

  # Take a snapshot of /proc/filecache.
  def snapshot(condition = 'true')
    filecache = CachedFile.filecache

    read_filecache_index filecache

    @cfile_by_name.each_value do |cfile|
      if cfile.cached > 0
        cfile.init_pages { |idx, len, state, refcnt| eval condition }
      end
    end

    self
  end

  # get the cached file list from /proc/filecache.
  def read_filecache_index(filecache)
    CachedFile.reset_seq
    headers = []
    filecache.syswrite('ls')
    filecache.rewind
    filecache.each_line do |line|
      if line[0] == ?#
        headers = line.split
        headers.shift
        if headers[0] == 'filecache' then
          module_version = headers[1]
        end
        next
      end

      fields = {}
      tmp = line.split
      headers.each_index { |index| fields[headers[index]] = tmp[index] }

      file = fields['file']
      dev  = fields['dev']
      next if file == '(noname)'
      if file.include? ?\\
        file.gsub! '\011', "\011"  # ht
        file.gsub! '\012', "\012"  # nl
        file.gsub! '\040', "\040"  # sp
        file.gsub! '\\',   "\\"    # \
      end

      if file =~ /\([0-9a-f]{2}:[0-9a-f]{2}\)/ then
        # handle block device
        # - transform file name from digital form to real ones
        fs  = @@fstab[file.delete('(:)').hex]
        next unless fs
        file = fs.device_file
        dev  = $BDEV_ID
      else
        # handle normal files
        # - expand file name to full path name
        # - ignore dirs/symlinks
        dev = dev[0,5].delete(':').hex
        fs = @@fstab[dev]
        next unless fs
        file = fs.mount_point + file unless fs.mount_point == '/'
        next unless File.file?(file)
      end

      cfile         = CachedFile.new file
      cfile.dev     = dev
      cfile.state   = fields['state']
      cfile.ino     = fields['ino'].to_i
      cfile.size    = fields['size'].to_i
      cfile.cached  = fields['cached'].to_i
      cfile.cachedp = fields['cached%'].to_i
      cfile.refcnt  = fields['refcnt'].to_i
      cfile.process = fields['process']
      cfile.uid     = fields['uid'].to_i
      cfile.accessed = fields['accessed'].to_i
      @cfile_by_name[file] = cfile
    end # filecache.each_line
  end

  # Load the pre-saved snapshot of /proc/filecache from a file or dir
  def load(path)
    if File.file? path
      load_file path
    elsif File.directory? path
      Dir.foreach(path) do |file|
        file = "#{path}/#{file}"
        next unless File.stat(file).file?
        load_file file
      end
    else
      $stderr.puts "#{path} not a file or directory."
      exit 1
    end
    self
  end

  # Load cached files' info from a pre-saved file.
  def load_file(file)

    CachedFile.reset_seq

    cfile = nil
    File.foreach(file) do |line|
      next if line.empty?
      next if line[0] == ?#

      line.chomp!
      if line[0] == ?/
        cfile = CachedFile.new line
        next unless File.exist?(line) # sorry for the creat and abandon fuss
        cfile.init_attrs
        @cfile_by_name[cfile.file] = cfile if @@fstab[cfile.dev]
      elsif line =~ /^\d+\s+\d+/
        idx, len = line.split
        cfile.add_page_range idx.to_i, len.to_i
      end

    end # File.foreach
    self
  end

  def dump(dir)
    fds = Hash.new

    @cfile_by_name.values.sort { |x,y| x.seq <=> y.seq} .each do |cfile|
      fs = @@fstab[cfile.dev]
      next unless fs
      fds[cfile.dev] ||= File.open("#{dir}/#{File.basename fs.device_file}",
                                    File::CREAT | File::TRUNC | File::RDWR)
      fds[cfile.dev].print cfile.to_s
    end

    fds.each_value { |fd| fd.close }
    self
  end

end

#
# filecache - a cmdline tool for /proc/filecache related operations
#

require 'optparse'
require 'ostruct'
require 'fileutils'
require 'readline'

$verbose = 0
$cfiles = CachedFileList.new

opts = OptionParser.new do |opts|
  opts.banner = "Usage: #{$0} [options]"
  opts.separator ''
  opts.separator 'Available options:'

  opts.on('-v', '--verbose [LEVEL]', 'set level of verbosity') do |level|
    $verbose = (level || 1).to_i
  end

  opts.on('-q', '--query [FILE]',
                'query cached pages of FILE in /proc/filecache.') do |file|
    filecache = CachedFile.filecache
    filecache.syswrite(file || 'ls')
    filecache.rewind
    filecache.each_line { |line| puts line }
  end

  opts.on('-D', '--drop [WHAT]',
                'drop cached pages or slabs.') do |what|
    if what == nil or what == 'pagecache'
      CachedFile.filecache.syswrite "drop pagecache"
    end
    if what == nil or what == 'slabcache'
      CachedFile.filecache.syswrite "drop slabcache"
    end
  end

  opts.on('-s', '--snapshot [CONDITION]',
    'load from /proc/filecache filtering pages with CONDITION.') do |condition|
    $cfiles.snapshot(condition || 'true')
  end

  opts.on('-l', '--load SRC',
                'load the pre-saved snapshot from SRC file/dir.') do |src|
    $cfiles.load src
  end

  opts.on('-d', '--dump DST', 'dump the snapshot to the DST dir.') do |dst|
    Dir.mkdir(dst) if !FileTest.exist?(dst)
    $cfiles.dump dst
  end

  opts.on('-u', '--union SRC', 'the set union.') do |src|
    $cfiles.union CachedFileList.new.load(src)
  end

  opts.on('-i', '--intersection SRC',
                'the set intersect between pre-snapshort and SRC.') do |src|
    $cfiles.intersection CachedFileList.new.load(src)
  end

  opts.on('-f', '--difference SRC',
                'the set difference between pre-snapshort and SRC.') do |src|
    $cfiles.difference CachedFileList.new.load(src)
  end

  opts.on('-F', '--rdifference SRC',
                'the set difference between pre-snapshort and SRC.') do |src|
    c = CachedFileList.new.load(src)
    c.difference $cfiles
    $cfiles = c
  end

  opts.on('--limit-size KB', 'delete files larger than KB.') do |kb|
    KB = kb.to_i
    $cfiles.delete_if { |f| f.size > KB }
  end

  opts.on('--prune-path PATTERN', 'exclude files matching PATTERN.') do |pattern|
    p = Regexp.new(pattern)
    $cfiles.delete_if { |f| p =~ f.file }
  end

  opts.on('--delete-if CONDITION', 'delete files satisfying CONDITION.') do |condition|
    $cfiles.delete_if { |f| eval condition }
  end

  opts.on('--complete PERCENT', 'complete to full file if cachedp > PERCENT') do |percent|
    cachedp = percent.to_i

    $cfiles.each do |f|

      if f.file =~ /(sbin|bin|lib)/
        scale = 2
      else
        scale = 1
      end

      if scale * f.cached * 100 > f.size * cachedp
        f.add_page_range(0, (f.size + 3) / 4)
      end

    end
  end

  opts.on('--list', 'list size, cached, cachedp, collected, collectedp, process, dev, filename') do
    mtab = FSTab.new '/etc/mtab'
    $cfiles.each do |f|
      printf "%8d %8d %8d ", f.size, f.cached, f.cached * 100 / (f.size|1)
      printf "%8d %8d ",  f.collected, f.collected * 100 / (f.cached|1)
      printf "%-16s %-10s %s\n", f.process, mtab[f.dev].device_file, f.file
    end
  end

  opts.on('--mtab', 'show mounts info') do
    mtab = FSTab.new '/etc/mtab'
    puts mtab.to_s
    fstab = FSTab.new '/etc/fstab'
    puts fstab.to_s
    mounts = FSTab.new '/proc/mounts'
    puts mounts.to_s
  end

  opts.on('--account', 'show some accounting numbers') do
    files = 0
    size = 0
    cached = 0
    ranges = 0
    collected = 0
    $cfiles.each do |f|
      files = files + 1
      size = size + f.size
      cached = cached + f.cached
      ranges = ranges + f.page_ranges.size
      collected += f.collected
    end
    printf "total     %8d MiB\t%8d files\n", size >> 10, files
    printf "cached    %8d MiB\t%15s\t%3d%%\n", cached >> 10, ' ', cached * 100 / (size|1)
    printf "collected %8d MiB\t%8d ranges\t%3d%%\n", collected >> 10, ranges, collected * 100 / (cached|1)
  end

  opts.on('--defrag HABITAT', 'do poor man\'s defrag via dir HABITAT.') do |habitat|

    unless 'Ss016'.include? `runlevel`[2]
      print 'WARNING: defraging in multi-user mode may lead to data corruption.'
      print 'Continue anyway? [y/N]'
      exit if $stdin.readline[0] != ?y
    end

    mtab = FSTab.new '/etc/mtab'
    total_files = total_size = 0
    moved_files = moved_size = 0

    $cfiles.each_ordered do |f|
      fstat = File.lstat f.file
      total_files = total_files + 1
      total_size  = total_size + fstat.size

      # skip obscure file
      next unless fstat.file?

      # skip multi-linked file
      next unless fstat.nlink == 1

      # skip recently modified file
      next unless Time.now - fstat.mtime > 86400 # 24 hour

      # skip opened file (is it working?)
      File.open(f.file) do |fd|
        unless fd.flock(File::LOCK_EX | File::LOCK_NB)
          puts "skiping opened file #{f.file}" if $verbose >= 2
          next
        end
        fd.flock(File::LOCK_UN)
      end

      # check habitat dir existence
      hdir  = mtab[f.dev].mount_point + '/' + habitat
      hfile = hdir + '/' + File.basename(f.file)
      unless File.directory? hdir
        $stderr.puts "defrag habitat directory #{hdir} do not exist"
        exit 1
      end

      moved_files = moved_files + 1
      moved_size  = moved_size + fstat.size

      # poor man's defrag ;-)
      begin
        FileUtils.cp f.file, hfile, :preserve => true
        tstat = File.lstat hfile
        raise 'not in same filesystem' if fstat.dev != tstat.dev
        raise 'file changed' if fstat.size != tstat.size or
                                fstat.mode != tstat.mode or
                                fstat.mtime != tstat.mtime
        FileUtils.rm f.file
        FileUtils.ln hfile, f.file
        FileUtils.rm hfile
        puts "#{f.file} <=> #{hfile}" if $verbose >= 11
      rescue Exception => msg
        $stderr.puts "Failed to defrag file #{f.file} => #{hfile}: #{msg}."
        exit 1
      end
    end

    if $verbose >= 2
      puts "moved  #{moved_files} files / #{moved_size >> 20} MB"
      puts "out of #{total_files} files / #{total_size >> 20} MB"
    end

  end

  opts.on_tail('-h', '--help', 'Show this message.') do
    puts opts
  end
end

if ARGV.size > 0
  opts.parse(ARGV)
else
  @@filecache = File.open('/proc/filecache', File::RDWR)
  @@filecache.syswrite('set private')
  while buf = Readline.readline("> ", true)
    # words = buf.split
    @@filecache.rewind
    @@filecache.syswrite(buf)
    @@filecache.each_line do |line|
      puts line
    end
  end
end


# vim: sw=2 ts=2 et
